% 

%%%%%%% small_network_mcmc.m %%%%%%% 
% 

% 

% This program utilizes a Markov Chain Monte Carlo 

% method for the prediction of the most likely configuration 

% or state of a regulatory network. 



% 
% 



% smg - Last Modified: 8/7/00 

frame_count = 0; 
rejected = 0; 
edge^number (1 , 1) = 0; 

% Ask user for set-up information 

configuration = input {'Starting from scratch (0) or from an initial Adjacency mtx (1) 

;); 

if configuration == 1 

A_dir = input ('Enter path and filename for Adjacency matrix: '); 
else 

initial__edges = input ('Input initial number of edges: '); 

^nd 

©iput_dir = input {'Enter directory for probability matrices: •); 
g?t:erations = input ('Input number of iterations: •); 
^urn_in = input ('Enter niimber of burn- in iterations: 

llirectory = input Enter directory of input files in single quotes: *); 
^ = input ('Enter maximum nimnber of edges to sample (change) at one time: '); 
^ve_interval = input ( ' Enter save interval : ' ) ; 



J i 



Load precalculated arrays 
|c?d(input_dir) ; 

^oad edge_matrix; % used for sampling edges 

ll%>ad edge_probabilities; 

probarray = edge_probabilities ; % probability of an edge 
Hi ear edge_probabilities; 
load edge__probabilities_zero 

probarray_zero = edge_probabilities_zero; % probability of not having edge 
clear edge_jprobabilities_2ero; 

% Load look-up table for In of factorial 

load Infactxt -ascii; 

Infac = reshape (Infac ', 10008, 1) ; 



% Load data 
cd (directory) ; 
names = dir; 

% get vertices 

for n = 3 : length (names) ; %3-708 (Curagen) 3-13(test) 3-641(DIP) - process each file 

filename = names (n) .name; 
dataln = load(f ilename) ; 
protein {n-2) .id = n-2; 
newname = strtok( filename, ' . ' ) ; 
newname = str2num( newname) ; 
protein (n-2) .name - newname; 
protein ^iv-2K domains = datain(l :end) ; 
idnamelist (n-2) = newname; 



end 

len = length (idnamel is t) ; 



% number of vertices 



edge_freq = sparse (len, len) ; 

% Set up random edges between vertices 
if configuration == 0 
A = sparse ( len, len) ; 
for i = 1 : initial_edges 

i_init = 1 + floor (rand* len) ; 
j_init = 1 + floor (rand* len) ; 
A(i_init, j_init) = 1; 
end 
else 

% must be called Alast - Make more user friendly 
load(A_dir) ; 

A = Alast; % A = Alast; 
clear Alast; 

end 



edgesinit = nnz (A) % number of interactions 

||?gevector = sum (A, 2); % number of outgoing edges for each vertex 

wedgevector = sum(A) ; % number of incoming edges for each vertex 

plge_jiumber ( 1 , 1 ) = edgesinit; 

fej Calculate edge probabilities 

fejprob_edges is the sum of log(prob) for each existing edge 
|i;prob_edges_zero is the sum of log{prob) for each non-existant edge 
]^ob_edges = 0; 
prob_edges_zero = 0; 
5f6r i = l:len 

for j = l:len 
W if A(i,j) == 1 

lU prob_edges = prob_edges + log (probarray (i, j ) ) ; 

ii^';: e 1 s e 

1^5. prob_edges_zero = prob_edges_zero + log (probarray__zero ( i , j ) ) ; 

L?; end 

&id 



%%%%%%%%%%%%%%%% Calculate system probability %%%%%%%%%%%%%%%% 
LOGPTRANS = prob_edges; 
LOGPTRANSzero = prob_edges_zero ; 



%%%%%%%%%%%%%%%%% Incoming edge probability - Multinomial 

% Determine the number of vertices with 0,1,.. incoming edges 

invertexedgevect = zeros (1, max (inedgevector)+l ) ; 

for i = 1: length (inedgevector) % i = 1 is vert w/ zero incoming edges 

invertexedgevect ( inedgevector ( i ) +1) = invertexedgevect ( inedgevector ( i ) +1 ) + 

end 

% Multinomial dist. 

MIN = sum ( invertexedgevect ) ; % Total niunber of vertices 

LOGPIN = 0; 
LOGDENIN = 0; 

for i = 1: length (invertexedgevect) 

LOGPIN = LOGPIN + invertexedgevect (i ) *log (probinedge (i-1) ) ; 

LOGDENIN = LOGDENIN + Infac (invertexedgevect (i) + 1); 
end 

LOGNUMIN lufac(MIN+l) ; 



%%%%%%%%%%%%%%%%%%%% Outgoing edge probability - Multinomial 
% Determine the number of vertices with 0,1, , . outgoing edges 
vertexedgevect = zeros ( 1 , max (edgevector ) +1 ) ; 

for i = 1: length (edgevector) % i = 1 is vertex w/ 0 outgoing edges 

vertexedgevect (edgevector (i)+l) = vertexedgevect (edgevector (i) +1) + 1; 
end ' 

% Multinomial dist, 

M = sum (vertexedgevect ) ; % Total number of vertices {= MIN) 

LOGPOUT = 0; 
LOGDEN = 0; 

for i = 1: length (vertexedgevect) 

LOGPOUT = LOGPOUT + vertexedgevect (i) * log (edgedist (i-1) ) ; 
LOGDEN = LOGDEN + Infac (vertexedgevect (i) + 1); 

end 

LOGNUM = lnfac(M+l); 

LOGDEN; 

LOGPOUT ; 



i%f Likelihood of system in initial configuration 

SiDGSYSX = (LOGNUM- LOGDEN) +LOGPOUTH- (LOGNUMIN-LOGDENIN) +LOGPIN+LOGPTRANS+LOGPTRANS 
;ft>rintf( 'Initial Probability = %5 , 4E\n\n\ LOGSYSX) 

Save initial configuration - for reference 
ppGSYSXorig = LOGSYSX; 
i^Lprig = A; 

edgevectororig = edgevector; 
^inedgevectororig = inedgevector; 
J|f4rtexedgevectorig = vertexedgevect; 
Siivertexedgevectorig = invertexedgevect; 

Add or remove edge at random & recompute likelihood 

flag =1; % for testing purposes 

if flag == 1; 

prob_edgescomp = prob_edges; 
prob_edges_zerocomp = prob_edges_zero; 
edgevectorcomp = edgevector; 
inedgevectorcomp = inedgevector; 
Acomp = A; 



% set up matrix for observing frequency of edge sampling 
edge_freq = zeros (len, len) ; 



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
tic; 

for X = 1: iterations 

% choose to add or remove 
if rand < 0.5 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
% add edge 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 



% create AO matrix 
AO = {A==0); 



f 
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% select edge from those that are currently = 0 
[edges, a-x2y,q_y2x] = edge_selection(ks, AO, Infac) ; 

[edge_rows,edge_cols] = size (edges); 
% update parameters 
for t = 1 : edge_rows 

A(edges(t,l) ,edges(t,2) ) = 1; 

prob_edges = prob_edges + log (probarray (edges (t, 1) , edges (t, 2) )) ; 
prob_edges_zero = prob_edges_zero - log (probarray_zero (edges (t, 1) , edges (t, 2) )) ; 
edgevector (edges (t,l) ) = edgevec tor (edges (t, 1) ) + 1; % update edge vectors 

inedgevec tor ( edges ( t , 2 ) ) = inedgevector ( edges ( t , 2 ) ) + 1 ; 
edge_freq(edges(t,l) /edges(t,2) ) = edge_freq( edges (t, 1) , edges (t, 2) ) + 1; 

end 



% flag showing state 
add_edge = 1; 

else 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
% remove an edge 

% select preferentially from proteins that should be less well connected 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

% select edge to remove 

[edges, c3ux2y,qLy2x] = edge_selection{ks,A,lnfac) ; 
% change edge 

[edge„rows,edge_cols] = size (edges); 

% update parameters 
for t = l;edge_rows 

A(edges(t,l) ,edges{t,2) ) = 0; 

prob_edges = prob^edges - log (probarray (edges (t, 1) , edges (t, 2) )) ; 
prob_edges_2ero = prob_edges_zero -f log (probarray_zero (edges ( t, 1) , edges (t, 2 ))) ; 
edgevector (edges ( t, 1) ) = edgevec tor (edges (t, 1) ) - 1; 
inedgevector (edges (t, 2) ) = inedgevector (edges (t, 2) ) - 1; 
edge_freq(edges(t,l) ,edges(t,2) ) = edge_f req ( edges ( t , 1 ), edges { t , 2 ) ) + 1; 

end 



% flag showing state 
add_edge = 0; 

end 



%%%%%%%%%%% 

% Essentially, a repeat of initial system probability calculation 

LOGPTRAiqSY = prob^edges; % sum of transition probabilities for 

LOGPTRA]SiSzeroY = prob_edges_zero; % new system (w/ or w/o new edge) 

%%%%%%%%%%%%%%%%%%%%%%%%%%% 
% Incoming edge probability 

invertedgenew = zeros (l,max(inedgevector) +1) ; % # of vert, w/ 0,1,... edges 
for i = 1: length (inedgevector) % i = 1 -> vert w/ 0 incoming edges 

invertedgenew( inedgevector (i)+l) = invertedgenew(inedgevector {i)+l) + 1; 

end 

MINY = sum (invertedgenew) ; % Calc Multinomial 
LOGPINY = 0; 



LOGDENINY = 0; 

for i ^ 1: length (invertedgenew) 

LOGPINY = LOGPINY + invertedgenew(i) *log (probinedge (i-l) ) ; 
LOGDENINY = LOGDENINY + Inf ac ( invertedgenew ( i ) + 1 ) ; 

end 

LOGNUMINY = Infac (MINY+l) ; 



%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
% Outgoing edge probability 
vertedgenew = zeros {1, max (edgevec tor) +1) ; 
for i = 1 : length (edgevec tor) 

vertedgenew ( edgevector ( i ) +1) = vertedgenew ( edgevec tor ( i ) +1 ) + 1 
end 

MY = stun (vertedgenew) ; % Calc Multinomial 

LOGPOUTY - 0; 
LOGDENY = 0; 

for i = 1: length (vertedgenew) 

LOGPOUTY = LOGPOUTY + vertedgenew (i ) *log (edgedist ( i-l )) ; 

LOGDENY = LOGDENY + Inf ac (vertedgenew ( i ) + 1) ; 
end 

LOGNUMY = lnfac(MY+l); 

i i 

jr"i %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

% Probability of new configuration 
LOGSYSY = (LOGNUMY-LOGDENY)+LOGPOUTY+(LOGNUMINY- 
^DGDENINY) +LOGPINY+LOGPTRANSY+LOGPTRANSzeroY ; 

S it ; 

% Check if iterations>burn_in & establish plotting interval based 
J: % the number of edges existing after burn in 
1^* if X == burn_in 

%save_interval = nnz (A) ; 
M? %Astart = A; 
f'i Asave = A; 
pii saves = 1; 

SYSVECT ( saves , 1 ) = x; 

SYSVECT ( saves , 2 ) = LOGSYSX; 

edge_number ( 1 , saves ) =nnz (A) ; 

frame_count = 1; 

frame ( f rame_count ) , ad j = Asave ; 

frame ( frame_count ) .edge_num = nnz (A) ; 

end 



% Save configuration after every ' save_interval * iterations 
if X > bum_in & rem(x, save^interval) == 0 
Asave = Asave + A; 

saves = saves + 1; 
SYSVECT (saves, 1) = x; 
SYSVECT(saves,2) = LOGSYSX; 
^dge_number ( 1 , saves ) = nnz ( A) ; 

end 



% Save "frames" for animation of edge probabilities 
if X > burn_in & r€m(x, 200*save_interval) == 0 

frame_count = frame_count + 1; 

frame ( f rame_count ) . ad j = Asave ; 

frame (frame_co\int) .edge_num = nnz (A) ; 

end 
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%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

% probability of accepting edge 

paccept = exp((LOGSYSY + g y2x) - (LOGSYSX + q_x2y) ) ; 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

if rand <= paccept % decide whether or not to keep change 

% keep change 
Acomp = A; 

edgevectorcomp = edgevector; 
inedgevectorcomp = inedgevector; 
prob_edgescomp = prob_edges; 
prob„edges_2:erocomp = prob__edges_zero; 

f printf ( • %5 . 0d\tpaccept=%3 . 2E\ tEdges=:%4d\nLOGOLD=%5 . 4E\tLOGNEW=%5 . 4E\ tDif f =%5 4E\n ' 
X, paccept, nnz (A) , LOGSYSX, LOGS YSY,LOGSYSY- LOGSYSX) 

fprintf('q:„x2y=: %5.4E\tq_y2x= %5,4E\n\n',q„x2y,q_y2x) 

LOGSYSX = LOGSYSY; 

gl % update sampling distributions appropriately when an edge is added 
% or removed 

. '! else 

W % proposed addition or removal of edge is not accepted 
4^? rejected = rejected + 1; 

§-\ A = Acomp; % reset values to that of the previous iteration 

edgevector = edgevectorcomp; 

inedgevector = inedgevectorcomp; 

prob_ edges = prob„edgescomp; 
W prob_edges_zero = prob_edges„zerocomp; 
li' end 
^d 

'/If 



%save d:\apps\matlabrll\work\dipdata\Astart.mat Astart; 
%save d:\apps\matlabrll\work\dipdata\Asave.mat Asave; 

rejected/iterations % percent of changes rejected 

nnz (A) 

%f igure; 

%gplot ( Abest , nodeloc , * -o ' ) ; 

clear Acomp; 
clear AO; 



figure; 

plot(SysVECT(:,l),SYSVECT(:,2)); % Plot system probability at each ' save interval ' 

xlabel ( * Iteration Number ' ) 

ylabel ( »Log(L) ' ) 

title (['MCMC test ~ •,date]) 

%text (14000, -1608, '1500 edges-start, ### edges-end, s = 0.025, 50% Rejection^) 
figure; 

pcolor (Asave . /saves) ; 
shading interp 
colorbar ( ' v ' ) 
xlabel ( • Vertex ' ) 



ylabel ( ' Vertex • ) 

title ( 'Fraction of Time Edge is Occupied') 
figure; 

plot ( edge_nuinber ) 
xlabel ( ' Save Number • ) 
ylabel ( "Number of Edges ' ) 

figure; 

pcolor (edge_f req) ; 
shading interp 
colorbar ( ' v' ) 

ti tie ( 'Frequency of Edge Sampling'); 
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function Cout,cux2y,qjy2x3 = edge_selection(ks, A, Infac) 
% 

% out = edge_selection(ks,A,lnfac) 

% 

% 

% This program is used for randomly selecting edges 

% from an adjacency matrix. 

% 

% Input A is an adjacency (or inverse adjacency) matrix. 
% 'ks' is the maximum number of edges to be sampled. 
% 



len = length (A) ; 
total_edges = len*len; 

% find all I's in A 

[source, target] = find(A==l); 

% number of interactions available for sampling 
edges^avail = length (source) ; 

number of edges to be sampled 
3^ges_sampled = 1 + f loor {rand*ks) ; %rand*edges_avail 

%\ 

^%:;make sure you don't sample from more than available 
4f edges_sampled > edges_avail 

edges_sampled = edges_avail; %useful only for ks case 

gnd 

I^Jmix edges to be sampled 
plioose_from„this = randperm(edges_avail) ; 

^> Sample edges from 1 to edges_sampled 

g^t = zeros (edges_sampled, 2) ; %initialize out 

Z 7. I 

b"Ut ( : , 1) = source (choose_from_this (1 : edges_sampled) ) ; 
out (: ,2) = target ( choose_f rom_this ( 1 : edges_sampled) ) ; 



% Calculate the proposal distribution from new to old and old to new 
qx = 0; 
qy = 0; 

opp^edges = total_edges - edges_avail + edges_sampled; 

for i = l:edges_sampled 

qx = qx + log( (edges^sampled - i + 1) / (edges_avail - i + 1)); 
qy = qy + log( (edges_sampled - i + l)/{opp_edges - i +1)); 

end 

q_x2y = qx; 
qL:72x = qy; 



4 

function [out, q_x2y, q_y2x] = edge_selection_old{ks, A, Inf ac) 
% 

% out = edge_selection_old(ks, A, Infac) 
% 

% 

% This program is used for randomly selecting edges 

% from an adjacency matrix, 

% 

% Input A is an adjacency (or inverse adjacency) matrix, 
% 'ks' is the maximum number of edges to be sampled. 
% 



len = length (A) ; 
total_edges = len* len; 

% find all I's in A 

[source, target] = find(A==l) ; 

% number of interactions available for sampling 
edges^avail = length ( source ) ; 

nimber of edges to be sampled 
P|iges_sampled = 1 + floor (rand*ks) ; %rand*edges_avail 

^p^imake sure you don't sample from more than available 
^ edges_sampled > edges_avail 

p edges_sampled = edges_avail; %useful only for ks case 

^iid 

s ' 

sample interactions available and place x and y coordinates in "out" 
ait = 2eros(edges_sampled,2) ; %initiali2e out 
|£rows,cols] = size (out ); 

Miile i <= edges_sampled 

t:l X = 1 + floor (rand*edges_avail) ; % pick vertex 

for n = lirows 

if intersect ( [source (x) , target (x) ], out, 'rows' ) 

% sampling previously sampled point 

% try another 

break; 
else 

% previously unsampled - place in out matrix 
out(i,l) = source(x); 
out(i,2) = target (x); 
i = i + 1; 

end 

end 

end 
out; 



% Calculate the proposal distribution from new to old 
qx = 0; 

qy = 0; 

opp_edges = total_edges - edges_avail + edges_sampled; 

for i = 1 : edges_sampled 

qx = qx + log( (edges_sampled i + 1) / (edges_avail - i + 1)); 
qy = qy + log( (edges_sampled - i + 1) / (opp_edges - i +1)); 

end 

q_x2y = qx; 
q_y2x = qy; 



function [out,q_x2y,q_y2x] = edge_selection{ks, A, Infac) 
% 

% out = edge_selection(ks,A, Infac) 

% 

% 

% This program is used for randomly selecting edges 

% from an adjacency matrix. 

% 

% Input A is an adjacency (or inverse adjacency) matrix, 
% 'ks' is the maximum number of edges to be sampled. 
% 



len = length (A) ; 
total_edges = len* len; 

% find all I's in A 

[source, target] = find{A==l) ; 

% number of interactions available for sampling 
edges_avail = length { source ) ; 

5 •* 

number of edges to be sampled 
gjges_sampled = 1 + f loor (rand*ks) ; %rand*edges_avail 

femake sure you don't sample from more than available 
4S edges_sampled > edges_avail 

pi edges_sampled = edges^avail; %useful only for ks case 

pi^d 

%i|mix edges to be sampled 
^oose_from„this = randperm(edges__avail) ; 

||,Sairple edges from 1 to edges^sampled 

0t = zeros{edges_sampled,2) ; %initialize out 

Htit ( : , 1) = source (choose_from_this (1 :edges__sampled) ) ; 
out ( : , 2 ) = target (choose_f rom_this ( 1 : edges_sampled) ) ; 



% Calculate the proposal distribution from new to old and old to 
qx = 0; 

qy = 0; 

opp_edges = total_edges - edges_avail + edges_sampled; 

for i = l:edges_sampled 

<pc = qx + log( {edges_sampled - i + 1) / (edges_avail - i + l)); 
qy = qy + log( {edges_sampled - i + 1 ) / ( opp_edges - i +1)); 

end 

q„x2y = qx; 
q_y2x = qy; 



function [out,q„x2y,q^2x] = edge_selection_old(ks. A, Infac) 
% 

% out = edge_selection_old(ks,A; Infac) 

% 

% 

% This program is used for randomly selecting edges 

% from an adjacency matrix. 

% 

% Input A is an adjacency (or inverse adjacency) matrix. 
% 'ks' is the maximum number of edges to be sampled. 
% 



len = length (A) ; 
total_edges = len* len; 

% find all I's in A 

[source, target] = find(A==l) ; 

% number of interactions available for sampling 
edges_avail = length (source) ; 

number of edges to be sampled 
Siges_sampled = 1 + f loor (rand*ks) ; %rand*edges_avail 

^jmake sure you don't sample from more than available 
© edges__sampled > edges_avail 

5; edges^sampled = edges_avail; %useful only for ks case 

Snd 

gi sample interactions available and place x and y coordinates in "out" 
at = zeros(edges_sampled,2) ; %initiali2e out 
ttows,cols] = size(out); 

Miile i <= edges_sampled 

IkJ x = 1 + floor (rand* edges_avail) ; % pick vertex 

pJ for n = l:rows 

if intersect ( [source (x) , target (x) ] ,out, 'rows' ) 
% sampling previously sampled point 
% try another 
break; 
else 

% previously unsampled - place in out matrix 
out(i,l) = source(x); 
out ( i , 2 ) = target (x) ; 
i = i + 1; 

end 

end 

end 
out; 



% Calculate the proposal distribution from new to old 
qx = 0; 
qy = 0; 

opp_edges = total^edges - edges^avail + edges_saitpled; 

for i = 1 : edges_sampled 

qx = <pc + log ( ( edges_sampled - i + 1) / {edges_avail - i + 1)); 

qy = qy + log( (edges_sampled - i + 1} / (opp_edges - i +1)); 

end 

qL.x2y = <pc; 

qLJf2x = qy; 



function prob = edgedist (numedges) 
% 

% This function takes as input the number of outgoing 
% edges from a vertex and returns the probability of 
% this as "prob" 
% 

% Based on fits to experimental data 
% 

% smg - Last Modified: 7/22/00 
% 

if numedges == 0 

prob = 0.489751; 
else 

prob = 0.304647*numedges. (-1.9691) ; 
end 




is \ 



function y = probinedge (nimedges) 

% 

% 

% 

% 

% 



if niimedges == 0 
y = 0.306735; 
else 

y = 0.556352*nuinedges.^(-2.8037) 
end 
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